In [1]:
import ciso8601, datetime, json, os
from notebook.services.config import ConfigManager
cm = ConfigManager()
_ = cm.update('livereveal', {
    'theme': 'beige',
    'transition': 'fade'
})

The inimitable schema library

Part 2

By @stavros

Structured data is everywhere


In [2]:
data = {
    "operation": "upload",  # "upload" or "delete"
    "timeout": 3600,  # Optional, how long the sig should be valid for.
    "md5": "deadbeefetc",  # Optional
    "files": {
        "5gbCtxlvljhx5-al": {
            "size": 65536,
            "shred_date": "2015-05-02T00:00:00Z"  # Must be a date from now up to 4 months in the future.
        },
    }
}

How do we validate it?


In [3]:
if data.get("operation") not in ["upload", "delete"]:
    raise SomeError("Operation not valid.")

try:
    timeout = int(data.get("timeout"))
except ValueError:
    raise SomeError("Timeout not a number.")

if not 0 < timeout <= 3600:
    raise SomeError("Timeout not up to one hour in the future.")

if data.get("md5") and not isinstance(data["md5"], str):
    raise SomeError("md5 is not a valid MD5 hash.")

if not isinstance(data.get("files"), dict):
    raise SomeError("files must be a dictionary.")

# etc

Is there a better way?

No.

Just kidding, of course there is. Who asks "is there a better way?" if there's no better way? No one, that's who.

Presenting the schema library.


In [4]:
from schema import Schema, And, Or, Optional, Use, SchemaError

schema = Schema({
    "foo": int,
    Optional("hello"): "hi!",
})

data = {
    "hello": "hi!",
    "foo": 3,
}

schema.validate(data)


Out[4]:
{'foo': 3, 'hello': 'hi!'}

In [5]:
schema = Schema({
    "foo": int,
    Optional("hello"): "hi!",
})

data = {
    "foo": 3,
}

schema.validate(data)


Out[5]:
{'foo': 3}

In [6]:
schema = Schema({
    "foo": int,
    Optional("hello"): "hi!",
})

data = {
    "hello": "yo",
    "foo": 3,
}

try:
    schema.validate(data)
except SchemaError as e:
    print e


'hi!' does not match 'yo'

Tricks


In [7]:
schema = Schema(And(int, fetch_user_by_id))

data = "/tmp/pythess"

try:
    print schema.validate(data)
except SchemaError as e:
    print(e)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-2ca8e815d87c> in <module>()
----> 1 schema = Schema(And(int, fetch_user_by_id))
      2 
      3 data = "/tmp/pythess"
      4 
      5 try:

NameError: name 'fetch_user_by_id' is not defined

In [ ]:
schema = Schema(range(10))

data = [2, 4, 6, 2, 2, 2, 20]

try:
    print schema.validate(data)
except SchemaError as e:
    print(e)

In [ ]:
schema = Schema({
    "shred_date": And(
        basestring,
        Use(ciso8601.parse_datetime_unaware),
        datetime.datetime,
        Use(lambda d: (d - datetime.datetime.now()).days),
        lambda d: 0 < d < 120,
        error="shred_date must be a valid future date string up to 120 days from now.")
})

data = {
    "shred_date": "2016-10-10T00:00:00Z",
}

try:
    print schema.validate(data)
except SchemaError as e:
    print(e)

In [ ]:
operations = {"upload": "PUT", "delete": "DELETE", "replace": "POST"}
schema = Schema(And(Use(json.loads, error="Invalid JSON"), {
        "operation": And(lambda s: s in operations.keys(), Use(operations.get), error="Valid operations are: %s" % ", ".join(operations.keys())),
        "files": {And(basestring, lambda s: len(s) > 5, error="Filename must be a string longer than 5 characters."): {
            Optional("size"): And(int, lambda i: i > 0, error="Size must be a positive integer."),
            Optional("shred_date"): And(
                basestring,  # Make sure it's a string.
                Use(ciso8601.parse_datetime_unaware),  # Parse it into a date.
                datetime.datetime,  # Make sure it's a date now.
                lambda d: 0 < (d - datetime.datetime.now()).days < 120,  # Make sure it's in the future, up to 120 days.
                error="shred_date must be a valid future date string up to 120 days from now.")
            }}}))
data = """{
    "operation": "repklace",
    "files": {
        "file.nam": {
            "size": 100,
            "shred_date": "2016-01-01T00:00:00Z"
        }}}}"""
try:
    print schema.validate(data)
except SchemaError as e:
    print e

Questions?

Thank you!